####################################
# Code to create plots for:        #
#                                  #
# "From Economic Competition to    #
# Military Combat: Export          #
# Similarity and International     #
# Conflict" (main text)            #
#                                  #
# J. Tyson Chatagnier and Kerim    #
# Can Kavakli                      #
#                                  #
# Published in the Journal of      #
# Conflict Resolution              #
####################################

library(foreign)
library(reshape)
library(ggplot2)

rm(list = ls(all = TRUE))

#######################
# Define function to  #
# calculate predicted #
# values.             #
#######################

GetPreds <- function(xmat,
                     coefs,
                     covmat,
                     var.change,
                     interaction = NULL,
                     change.lo   = NULL,
                     change.hi   = NULL,
                     change.vec  = NULL,
                     var.sq      = NULL,
                     app.fn      = "median",
                     t.diff      = NULL,
                     tval        = 1.96,
                     n.out       = 1000,
                     ci          = FALSE
                     ) {
                        # xmat is a matrix of x variables
                        # Note that xmat should include a constant.
                        # coefs is a vector of coefficient values
                        # covmat is the covariance matrix
                        # var.change is a number specifying which X
                        # is the variable of interest. This should
                        # be a number greater than 1, since a 1 would
                        # indicate the intercept.
                        # Interaction is a vector of length 2, with
                        # a first element that gives the column
                        # representing the variable with which
                        # var.change is to be interacted, and the
                        # second giving the column in which the
                        # interaction should be stored.
                        # var.sq is used if we want to include a 
                        # squared term. Just tell getPreds() which
                        # column that should be.
    k <- ncol(xmat)
    b <- coefs
    if (is.null(change.vec)) {
        change.lo <- ifelse(is.null(change.lo),
                            min(xmat[, var.change]),
                            change.lo
        )
        change.hi <- ifelse(is.null(change.hi),
                            max(xmat[, var.change]),
                            change.hi
        )
        change.vec <- seq(from       = change.lo,
                          to         = change.hi,
                          length.out = n.out
        )
    }
    if (is.numeric(app.fn)) {
        xc <- app.fn
    } else {
        x.nona <- na.omit(xmat)
        xc <- apply(x.nona,
                    2,
                    app.fn
        )
                        # Make sure there are no NAs when we apply the
                        # function to the matrix.
    }
    if (is.numeric(t.diff)) {
                        # t.diff is used to change the time
                        # easily, since we might care about
                        # particular values. It is a vector
                        # with two elements: the first element
                        # is the location of the original time
                        # variable, and the second is its
                        # desired value.
        t.vals <- c(t.diff[2],
                    t.diff[2] ^ 2,
                    t.diff[2] ^ 3
                    )
        xc[t.diff[1] : (t.diff[1] + 2)] <- t.vals
    }
    xc.mat <- matrix(rep(xc,
                         each = n.out
                     ),
                     ncol = k,
                     nrow = n.out
    )
                        # Create a matrix of constant values.
    xc.mat[, var.change] <- change.vec
                        # Vary the variable that needs varying.
    if (is.numeric(var.sq)) {
      xc.mat[, var.sq] <- change.vec ^ 2
    }
    if (length(interaction) == 2) {
      xc.mat[, interaction[2]] <- change.vec * xc[interaction[1]]
    }
    if (ncol(xc.mat) != nrow(as.matrix(b))) {
      b <- t(as.matrix(b))
    }
    xb <- xc.mat %*% b
    preds <- plogis(xb)
    if (ci) {
        deriv <- dlogis(xb)
                        # dlogis() is the derivative of plogis()
        deriv.mat <- sweep(xc.mat,
                           1,
                           deriv,
                           '*'
                     )
        var.pp <- deriv.mat %*% covmat %*% t(deriv.mat)
        se.pp <- sqrt(diag(var.pp))
        ci.lo <- preds - (tval * se.pp)
        ci.lo <- ifelse(ci.lo < 0,
                        0,
                        ifelse(ci.lo > 1,
                               1,
                               ci.lo)
                        )
        ci.hi <- preds + (tval * se.pp)
        ci.hi <- ifelse(ci.hi < 0,
                        0,
                        ifelse(ci.hi > 1,
                               1,
                               ci.hi)
                        )
        return(list(pred = preds,
                    change = change.vec,
                    lo = ci.lo,
                    hi = ci.hi
                    )
        )
    } else {
        return(list(pred = preds,
                    change = change.vec
                    )
        )
    }
}


###########################
# Read in data and create #
# necessary variables     #
###########################

sim.data <- read.dta("export_similarity_data.dta")

sim.data$dyadid <- sim.data$ccode1 + 0.001 * sim.data$ccode2
                        # Create a unique dyad identifier

######################
# Average similarity #
######################

fig.vals <- rbind(by(sim.data$exports,
                     list(sim.data$year,
                          sim.data$mid_init
                          ),
                     function(x) {
                       mean(x,
                            na.rm = TRUE
                            )
                     }
                     )
                  )

vals <- melt(fig.vals)
                        # Get a matrix with average similarity
                        # scores for MID and non-MID dyads.

vals <- na.omit(vals)
                        # Remove the years for which we do not have
                        # similarity data.

colnames(vals) <- c("year",
                    "mid_init",
                    "exports"
                    )

#################
# Create Fig. 1 #
#################

ggplot(data = vals, 
       aes(x        = year, 
           y        = exports, 
           group    = as.factor(mid_init), 
           colour   = as.factor(mid_init), 
           linetype = as.factor(mid_init), 
           shape    = as.factor(mid_init)
           )
       ) + 
  geom_point() + 
  geom_smooth(se     = FALSE, 
              size   = 1.25,
              method = "loess"
              ) +  
  xlab("") + 
  ylab("Average Export Similarity\n") +
  scale_colour_manual(values = c("red",
                                 "blue"
                                 )
                      ) + 
  annotate(geom          = "text",
           x             = 1990,
           y             = 0.35,
           label         = "MID Dyads",
           colour        = "black",
           size          = 5,
           fontface      = "bold"
           ) +     
  annotate(geom     = "text",
           x        = 1990,
           y        = 0.12,
           label    = "Non-MID Dyads",
           colour   = "black",
           size     = 5,
           fontface = "bold"
           ) +    
  theme_bw() +     
  theme(axis.text.x     = element_text(face ="bold", size=12),
        axis.text.y     = element_text(face ="bold", size=12),
        axis.title.y    = element_text(face ="bold", size=14),
        legend.position = "none"
        ) 

##############################
# Create lagged versions     #
# of some variables, as well #
# as dummies for decades.    #
##############################

library(DataCombine)

sim.data$l.caprat <- slide(data.frame(var    = sim.data$pwin_cap,
                                      dyadid = sim.data$dyadid,
                                      year   = sim.data$year
                                      ),
                           Var      = "var",
                           GroupVar = "dyadid",
                           slideBy  = -1
                           )[, 4]
                        # Use the slide() function from the DataCombine
                        # package to lag by one unit. The fourth column
                        # of the resulting matrix is the lagged vector.

sim.data$l.caprat2 <- sim.data$l.caprat ^ 2

sim.data$l.allied <- slide(data.frame(var    = sim.data$allied,
                                     dyadid = sim.data$dyadid,
                                     year   = sim.data$year
                                     ),
                           Var      = "var",
                           GroupVar = "dyadid",
                           slideBy  = -1
                           )[, 4]

sim.data$l.lodem <- slide(data.frame(var     = sim.data$smldem,
                                     dyadid = sim.data$dyadid,
                                     year   = sim.data$year
                                     ),
                          Var      = "var",
                          GroupVar = "dyadid",
                          slideBy  = -1
                          )[, 4]

sim.data$l.s <- slide(data.frame(var    = sim.data$s3un,
                                 dyadid = sim.data$dyadid,
                                 year   = sim.data$year
                                 ),
                      Var      = "var",
                      GroupVar = "dyadid",
                      slideBy  = -1
                      )[, 4]


sim.data$sixties <- ifelse(sim.data$year < 1970,
                           1,
                           0
                           )

sim.data$seventies <- ifelse(sim.data$year >= 1970 & sim.data$year < 1980,
                             1,
                             0
                             )

sim.data$eighties <- ifelse(sim.data$year >= 1980 & sim.data$year < 1990,
                            1,
                            0
                            )

sim.data$nineties <- ifelse(sim.data$year >= 1990,
                            1,
                            0
                            )
                        # We are combining 2000 into the 90s.

sim.data$l.sim <- slide(data.frame(var    = sim.data$exports,
                                   dyadid = sim.data$dyadid,
                                   year   = sim.data$year
                                   ),
                        Var      = "var",
                        GroupVar = "dyadid",
                        slideBy  = -1
                        )[, 4]

sim.data$l.dep <- slide(data.frame(var    = sim.data$lowerdep2015,
                                   dyadid = sim.data$dyadid,
                                   year   = sim.data$year
                                   ),
                        Var      = "var",
                        GroupVar = "dyadid",
                        slideBy  = -1
                        )[, 4]

sim.data$l.riv <- slide(data.frame(var    = sim.data$rivalry,
                                   dyadid = sim.data$dyadid,
                                   year   = sim.data$year
                                   ),
                        Var      = "var",
                        GroupVar = "dyadid",
                        slideBy  = -1
                        )[, 4]

sim.data$l.jointdem <- slide(data.frame(var    = sim.data$jointdem,
                                        dyadid = sim.data$dyadid,
                                        year   = sim.data$year
                                        ),
                             Var      = "var",
                             GroupVar = "dyadid",
                             slideBy  = -1
                             )[, 4]

sim.data$l.rawexp <- slide(data.frame(var    = sim.data$rawexports,
                                      dyadid = sim.data$dyadid,
                                      year   = sim.data$year
                                      ),
                           Var      = "var",
                           GroupVar = "dyadid",
                           slideBy  = -1
                           )[, 4]

sim.data$l.raw_dep <- slide(data.frame(var    = sim.data$lower_rawdep2015,
                                       dyadid = sim.data$dyadid,
                                       year   = sim.data$year
                                       ),
                            Var      = "var",
                            GroupVar = "dyadid",
                            slideBy  = -1
                            )[, 4]

sim.data$l.manexp <- slide(data.frame(var    = sim.data$manexports,
                                      dyadid = sim.data$dyadid,
                                      year   = sim.data$year
                                      ),
                           Var      = "var",
                           GroupVar = "dyadid",
                           slideBy  = -1
                           )[, 4]

sim.data$l.man_dep <- slide(data.frame(var    = sim.data$lower_mandep2015,
                                       dyadid = sim.data$dyadid,
                                       year   = sim.data$year
                                       ),
                            Var      = "var",
                            GroupVar = "dyadid",
                            slideBy  = -1
                            )[, 4]


#############################
# Plot effect of similarity #
#############################

b <- as.numeric(read.table("beta_maingraph.txt"))

vcov <- as.matrix(read.table("vcov_maingraph.txt"))
                        # Estimation is performed in Stata. Results
                        # are output to .txt files, which can be 
                        # read into R.

rel.data <- data.frame(py        = sim.data$mid_py,
                       py2       = sim.data$mid_py2,
                       py3       = sim.data$mid_py3,
                       contig    = sim.data$contig_dummy,
                       lndist    = sim.data$lndist,
                       caprat    = sim.data$l.caprat,
                       jointdem  = sim.data$l.jointdem,
                       l.s       = sim.data$l.s,
                       majmaj    = sim.data$majmaj,
                       minmaj    = sim.data$majmin,
                       rivals    = sim.data$l.riv,
                       europe    = sim.data$both_europe,
                       sixties   = sim.data$sixties,
                       seventies = sim.data$seventies,
                       eighties  = sim.data$eighties,
                       trade     = sim.data$l.dep,
                       sim       = sim.data$l.sim,
                       constant  = 1
                       )

colnames(rel.data) <- c("t",
	                      "t^2",
	                      "t^3",
	                      "contig",
	                      "lndist",
	                      "initcapshare",
                        "jointdem",
                        "s",
	                      "majmaj",
	                      "minmaj",
                        "rivals",
	                      "europe",
                        "sixties",
	                      "seventies",
	                      "eighties",
	                      "dependence",
	                      "similarity",
	                      "constant"
	                      )

val.vec <- c(1,      # t
             1,      # t^2
             1,      # t^3
             1,      # contig
             0,      # lndist
             0.8,    # initcapshare
             0,      # jointdem
             0.67,   # s
             0,      # majmaj
             0,      # minmaj
             0,      # rivalry
             0,      # europe
             0,      # 1960s
             0,      # 1970s
             0,      # 1980s
             0.12,  # dependence
             0.01,   # similarity,
             1       # constant
             )  
                        # Most values set near the empirical median
                        # Trade and similarity vary between high and
                        # low values across the two vectors


#####################
# Plot effects here #
#####################

sim.vals <- GetPreds(xmat           = rel.data,
                     coefs          = b,
                     covmat         = vcov,
                     var.change     = 17,
                     interaction    = NULL,
                     change.lo      = min(rel.sim.data$similarity, 
                                          na.rm = TRUE
                                          ),
                     change.hi      = max(rel.sim.data$similarity, 
                                          na.rm = TRUE
                                          ),
                     change.vec     = NULL,
                     var.sq         = NULL,
                     app.fn         = val.vec,
                     tval           = 1.96,
                     n.out          = 1000,
                     ci             = TRUE
                     )

sim.df <- data.frame(change     = sim.vals$change,
                     pred       = sim.vals$pred,
                     lo         = sim.vals$lo,
                     hi         = sim.vals$hi
                     )

#################
# Create Fig. 2 #
#################

pdf("sim_effect.pdf")
  ggplot(data = sim.df,
         aes(x = change,
             y = pred
             )
         ) +
         geom_line(size   = 1.25,
                   colour = "darkblue"
                   ) +
         geom_ribbon(data = sim.df,
                     aes(ymin = lo,
                         ymax = hi
                         ),
                     fill   = "darkblue",
                     alpha  = 0.3,
                     colour = NA
                     ) +
         xlab("Portfolio Similarity") +
         ylab("Probability of Conflict") +
         theme_bw()
dev.off()

##############################
# Raw vs. manufactured goods #
##############################

b.rm <- as.numeric(read.table("beta_rawmanuf.txt"))

vcov.rm <- as.matrix(read.table("vcov_rawmanuf.txt"))
                        # Analysis performed in Stata. Import
                        # results here.

rel.data <- data.frame(py        = sim.data$mid_py,
                       py2       = sim.data$mid_py2,
                       py3       = sim.data$mid_py3,
                       contig    = sim.data$contig_dummy,
                       lndist    = sim.data$lndist,
                       caprat    = sim.data$l.caprat,
                       jointdem  = sim.data$l.jointdem,
                       l.s       = sim.data$l.s,
                       majmaj    = sim.data$majmaj,
                       minmaj    = sim.data$majmin,
                       rivals    = sim.data$l.riv,
                       europe    = sim.data$both_europe,
                       sixties   = sim.data$sixties,
                       seventies = sim.data$seventies,
                       eighties  = sim.data$eighties,
                       rawdep    = sim.data$l.raw_dep,
                       mandep    = sim.data$l.man_dep,
                       rawexp    = sim.data$l.rawexp,
                       manexp    = sim.data$l.manexp,
                       constant  = 1
                       )

colnames(rel.data) <- c("t",
                        "t^2",
                        "t^3",
                        "contig",
                        "lndist",
                        "initcapshare",
                        "jointdem",
                        "s",
                        "majmaj",
                        "minmaj",
                        "rivals",
                        "europe",
                        "1960s",
                        "1970s",
                        "1980s",
                        "rawdep",
                        "mandep",
                        "rawsim",
                        "mansim",
                        "constant"
                        )

val.vec <- c(1,      # t
             1,      # t^2
             1,      # t^3
             1,      # contig
             0,      # lndist
             0.75,   # initcapshare
             0,      # jointdem
             0.75,   # s
             0,      # majmaj
             0,      # minmaj
             0,      # rivalry
             0,      # europe
             0,      # 1960s
             0,      # 1970s
             0,      # 1980s
             0.001,  # raw dependence
             0.001,  # man dependence
             0.01,   # raw similarity,
             0.03,   # man similarity,
             1       # constant
             )  
 
                        # Values are set to their medians for most
                        # Trade and similarity vary between high and
                        # low values across the two vectors


#####################
# Plot effects here #
#####################

rawman.min <- min(c(rel.data$rawsim,
                    rel.data$mansim
                    ),
                  na.rm = TRUE
                  )
rawman.max <- max(c(rel.data$rawsim,
                    rel.data$mansim
                    ),
                  na.rm = TRUE
                  )

sim.raw <- GetPreds(xmat           = rel.data,
                    coefs          = b.rm,
                    covmat         = vcov.rm,
                    var.change     = 18,
                    interaction    = NULL,
                    change.lo      = rawman.min,
                    change.hi      = rawman.max,
                    change.vec     = NULL,
                    var.sq         = NULL,
                    app.fn         = val.vec,
                    tval           = 1.96,
                    n.out          = 1000,
                    ci             = TRUE
                    )

sim.man <- GetPreds(xmat           = rel.data,
                    coefs          = b.rm,
                    covmat         = vcov.rm,
                    var.change     = 19,
                    interaction    = NULL,
                    change.lo      = rawman.min,
                    change.hi      = rawman.max,
                    change.vec     = NULL,
                    var.sq         = NULL,
                    app.fn         = val.vec,
                    tval           = 1.96,
                    n.out          = 1000,
                    ci             = TRUE
                    )

raw.df <- data.frame(change     = sim.raw$change,
                     pred       = sim.raw$pred,
                     lo         = sim.raw$lo,
                     hi         = sim.raw$hi
                     )

man.df <- data.frame(change     = sim.man$change,
                     pred       = sim.man$pred,
                     lo         = sim.man$lo,
                     hi         = sim.man$hi
                     )

both.df <- data.frame(change     = c(sim.raw$change,
                                     sim.man$change
                                     ),
                      pred       = c(sim.raw$pred,
                                     sim.man$pred
                                     ),
                      lo         = c(sim.raw$lo,
                                     sim.man$lo
                                     ),
                      hi         = c(sim.raw$hi,
                                     sim.man$hi
                                     ),
                      Type       = rep(c("Raw",
                                         "Manufactured"
                                         ),
                                       each = length(sim.raw$change)
                                       )
                      )

#################
# Create Fig. 3 #
#################

ggplot(data = both.df,
       aes(x      = change,
           y      = pred,
           colour = Type
           )
       ) +
       geom_line(size   = 1.25) +
       geom_ribbon(data = both.df,
                   aes(ymin = lo,
                       ymax = hi,
                       fill = Type
                       ),
                   alpha  = 0.3,
                   colour = NA
                   ) +
       xlab("Portfolio Similarity") +
       ylab("Probability of Conflict") +
       scale_colour_discrete(name = "Export Type") +         
       scale_fill_discrete(name = "Export Type") +
       theme_bw()


